* Created by BBG 2023/03
*_____________________________________________________________________________________________________________________________________________________
*
**# OVERALL PLAN
*_____________________________________________________________________________________________________________________________________________________

/*	Note that each section explains what is going on in a little bit more detail:
	GLOBALS & FILE LOCATIONS 				Where to find datafiles, plus other locations and settings
	CLEANING								My data cleaning
	QUICK CHECKS 							Misc things - missing variable count / prevalence of each individual dis measure / main analyses in test formation

*/
	
*_____________________________________________________________________________________________________________________________________________________
*
**# GLOBALS AND FILE LOCATIONS
*_____________________________________________________________________________________________________________________________________________________

macro drop _all						// Must do this BEFORE setting the dir globals below!
*do "C:\Users\k2256879\OneDrive - King's College London\Disability work\ESRC Future Leaders Disability\Phase 1 (Dis Emp Rates) - Intl\EHIS\0_globals.do"
capture cd "C:\Users\k2256879"
	if _rc==0	global user 	"C:\Users\k2256879"
capture cd "C:\Users\benba"
	if _rc==0	global user 	"C:\Users\benba"
// initialising, which clears everything, and I want to be able to re-run the globals without doing this!				
do "${user}\OneDrive - King's College London\Personal\ADO files\StataStartup.do"
do "${user}\OneDrive - King's College London\Disability work\ESRC Future Leaders Disability\Phase 1 (Dis Emp Rates) - Intl\ELSA-SHARE-HRS\0_initialising.do" // can just use the one from ELSA-SHARE, it's generic
// globals
do "${user}\OneDrive - King's College London\Disability work\ESRC Future Leaders Disability\Phase 1 (Dis Emp Rates) - Intl\EHIS\0_globals.do"				
dis in red "File location and variable list globals are loaded"
exit



*_____________________________________________________________________________________________________________________________________________________
*
**# DATA CLEANING
*_____________________________________________________________________________________________________________________________________________________

do "${dodir}\1_prep_EHIS3.do"						// this creates "${EHIS}\EHIS wave 3\0_w3cleaned ${versno}.dta", replace
do "${dodir}\1_prep_EHIS2.do"						// this creates "${EHIS}\EHIS wave 2\0_w2cleaned ${versno}.dta", replace
/*	THIS PRODUCES THE INDIVIDUAL EHIS WAVES
	use "${EHIS}\EHIS wave 2\0_w2cleaned ${versno}.dta", replace
	use "${EHIS}\EHIS wave 3\0_w3cleaned ${versno}.dta", replace
*/

// Combined wave 2 + wave 3 file
use "${EHIS}\EHIS wave 3\0_w3cleaned ${versno}.dta", replace
append using "${EHIS}\EHIS wave 2\0_w2cleaned ${versno}.dta", gen(wave)
	* Countrynum needs recreating, as otherwise the numbers refer to different countries in different waves
	drop countrynum
	encode country, gen(countrynum)
	order countrynum, after(country)
	label var countrynum "dv Country as labelled categories rather than string"
	* Need a 'wave' indicator
	recode wave (0=3)(1=2)
	label var wave		"EHIS wave2 vs. 3"
	order wave, before(proxy)
* Data cleaning on combined file
	label var wgt 		"Frequency weight (inc. proxy interviews)"
	recode hs1 hs2 (-3 -1=.r) // for -3, see the note on PHQ for w3 in 1_prep.do
	drop mainstat			// qs that are not quite comparable between waves 
	drop pc1a-ha3			// qs not being used here
	drop wgt_spec			// not available w2
	drop hs2				// This looks very, very strange - esp. in SE where completely reversed, but also comparatively across countries. Not sure if it was coded inconsistently in different countries...
* Consistency of method betwen wave
	bysort country: tab intmethod wave, col nof
	drop if inlist(country,"DE","IE","PT")			// change in survey mode (or totally unclear in case of PT)
	drop if inlist(country,"FR","RS")				// no trend data
	gen modeflag = 1 if inlist(country,"EE","ES","HR","HU","LT","MT","SE","UK")
		label var modeflag	"dv Limited changes in survey mode (see notes)"
		notes modeflag: EE balance between mixed modes changes
		notes modeflag: ES 1/3 by phone at w3 (rest f2f)
		notes modeflag: HR 1/4 by phone at w2 (rest f2f)
		notes modeflag: HU 1/3 by s/c at w3 (rest f2f) 
		notes modeflag: LT 1/2 by s/c or phone at w3 (rest f2f)
		notes modeflag: MT 1/3 by phone at w3 (rest f2f)
		notes modeflag: SE 1/8 by phone at w2 (rest s/c)
		notes modeflag: UK 1/4 by f2f at w2 (rest phone)
	order modeflag, after(intmethod)
* Saving
compress
save "${EHIS}\EHIS wave 3\0_w2w3combined ${versno}.dta", replace
	

			 
*_____________________________________________________________________________________________________________________________________________________
*
**# 5. COMPARISON OF EHIS TO ELSA-SHARE (non-bootstrap only, as doesn't use probabilistic dis)
*_____________________________________________________________________________________________________________________________________________________

do "${dodir}\5_EHISandSHARE_0master.do"				
do "${dodir}\5_EHISandSHARE_nonbootstrap.do"				


*_____________________________________________________________________________________________________________________________________________________
*
**# 6. BOOTSTRAPPED ANALYSIS at w3 (ALL AGES, effectively the same syntax as ELSA-SHARE analysis))
*_____________________________________________________________________________________________________________________________________________________

do "${dodir}\6_allages_0master.do"				
			 
					 
*_____________________________________________________________________________________________________________________________________________________
*
**# TREND OVER TIME (bootstrapped contrast, requires different bootstrap file)
*_____________________________________________________________________________________________________________________________________________________

do "${dodir}\7_EHIStrend_0master.do"				
/* Exploratory data analysis to figure out why some of the countries change so much:
	use "${EHIS}\EHIS wave 3\0_w3cleaned ${versno}.dta", replace
		recode hs1 (-3.=.p) (-1=.r)
		svy: prop llsiB severellsiB hs1 if country=="LT"
	use "${EHIS}\EHIS wave 2\0_w2cleaned ${versno}.dta", replace
		recode hs1 (-3.=.p) (-1=.r)
		svy: prop llsiB severellsiB hs1 if country=="LT"
